Use fermented foods Chow-chow, Kimchi, and Kombucha as model systems to understand microbial diversity. These foods can be created in a public science environment to engage the public and teach about food science and microbial ecology.
Load packages and define ggplot theme settings
Create function to fix NAs at taxonomy levels
fixTaxaNAs <- function(df){
df_fixed <- df %>%
mutate(Class=case_when(!is.na(Class)~paste0("c_", Class)),
Order=case_when(!is.na(Order)~paste0("o_", Order)),
Family=case_when(!is.na(Family)~paste0("f_", Family)),
Species=case_when(!is.na(Genus)~replace_na(Species, "spp."))) %>%
mutate(Class=coalesce(Class, Phylum),
Order=coalesce(Order, Class),
Family=coalesce(Family, Order),
Genus=coalesce(Genus, Family)) %>%
unite(Species2, c(Genus, Species), remove = FALSE, na.rm=TRUE, sep=" ")
return(df_fixed)
}
Load data
Create shortcuts for substrates
Organize metadata
Figure 2
Clean up recipe labels
chowRecipes <- c("Tomato", "Onion", "Pepper", "Cabbage", "TomatoOnion", "TomatoPepper", "TomatoCabbage", "OnionPepper", "OnionCabbage", "PepperCabbage", "TomatoOnionPepper", "TomatoOnionCabbage", "TomatoPepperCabbage", "OnionPepperCabbage", "TomatoOnionPepperCabbage", "brine")
chowRecipeLabels <- c("tomato", "onion", "pepper", "cabbage", "tomato & onion", "tomato & pepper", "tomato & cabbage", "onion & pepper", "onion & cabbage", "pepper & cabbage", "tomato, onion & pepper", "tomato, onion & cabbage", "tomato, pepper & cabbage", "onion, pepper & cabbage", "tomato, onion, pepper & cabbage", "brine")
kimchiRecipes <- c("Cabbage", "Radish")
kimchiRecipeLabels <- c("cabbage", "radish")
teaRecipes <- c("green", "black", "greenblack", "starter", "scoby")
teaRecipeLabels <- c("green tea", "black tea", "green & black teas", "starter", "commercial scoby")
Samples, per experiment, per day
| Day | N samples |
|---|---|
| Chow chow | |
| 3 | 23 |
| 10 | 22 |
| Kimchi | |
| 3 | 22 |
| 10 | 22 |
| Kombucha | |
| 4 | 17 |
| 8 | 17 |
Levels per experiment
## [1] 16 2 5
sampleNtable <- metadata_16S %>%
filter(Experiment!="Lab", Experiment!="control") %>%
with_groups(c(Experiment, Plant), summarize, N=n_distinct(JarID)) %>%
mutate(Plant=factor(Plant, levels=c(chowRecipes, "Radish", teaRecipes), labels = c(chowRecipeLabels, "radish", teaRecipeLabels))) %>%
arrange(Experiment, Plant) %>%
select(-Experiment) %>%
kableExtra::kable(caption = "Table S1. Sample contents", col.names = c("Contents", "N samples")) %>%
row_spec(0, bold=TRUE) %>%
kable_classic(html_font = "Arial", full_width=FALSE) %>%
pack_rows("Chow chow", 1, 16, bold = TRUE) %>%
pack_rows("Kimchi", 17, 18, bold = TRUE) %>%
pack_rows("Kombucha", 19, 23, bold = TRUE)
sampleNtable
| Contents | N samples |
|---|---|
| Chow chow | |
| tomato | 1 |
| onion | 1 |
| pepper | 1 |
| cabbage | 1 |
| tomato & onion | 1 |
| tomato & pepper | 1 |
| tomato & cabbage | 1 |
| onion & pepper | 1 |
| onion & cabbage | 1 |
| pepper & cabbage | 2 |
| tomato, onion & pepper | 1 |
| tomato, onion & cabbage | 1 |
| tomato, pepper & cabbage | 1 |
| onion, pepper & cabbage | 2 |
| tomato, onion, pepper & cabbage | 6 |
| brine | 1 |
| Kimchi | |
| cabbage | 12 |
| radish | 10 |
| Kombucha | |
| green tea | 5 |
| black tea | 5 |
| green & black teas | 6 |
| starter | 1 |
| commercial scoby | 1 |
save_kable(sampleNtable, zoom=5, file = file.path(figure_out, paste0("TableS1_", Sys.Date(), "_sampleNs.png")))
## file:////private/var/folders/pv/v37hw47124qb8x1m1ryyh2fc0000gn/T/RtmptWOpj8/TableS1_2025-02-17_sampleNs11b0f38307106.html screenshot completed
ASV_16S1 <- ASV_16S0 %>%
left_join(newLactoNames) %>%
replace_na(list(newGenus="x", newSpecies="x")) %>%
mutate(Genus=case_when(newGenus!="x"~newGenus, # update to new genus name
newGenus=="x"~Genus),
Species=case_when(newGenus!="x"~newSpecies, # update to new species names
newGenus=="x"~Species)) %>%
select(-newGenus, -newSpecies) %>% # remove those columns
left_join(naLactoSpecSeqs) %>% # update genus names for lactobacilli that were not assigned at the species level
mutate(Genus=case_when(Genus=="Lactobacillus"&is.na(Species)~newGenus,
Genus=="Lactobacillus"&!is.na(Species)~Genus,
Genus!="Lactobacillus"~Genus)) %>%
select(-newGenus)
## Joining with `by = join_by(Genus, Species)`
## Joining with `by = join_by(seq, Genus, Species)`
nrow(ASV_16S1)
## [1] 578
There are a total of 578 16S ASVs before filtering.
Remove the following:
522 ASVs remain after filtering.
Filter dataframes to samples in chow-chow, kimchi, and tea experiments only.
#ASVs that appear in each experiment
experiment_16SASVs_df <-ASV_16S_expts0 %>%
gather("SampleID", "Abundance", one_of(metadata_16S$SampleID)) %>%
left_join(metadata_16S, by = "SampleID") %>%
filter(Abundance>0) %>%
select(Experiment, seq) %>%
unique
experiment_16SASVs_list <- experiment_16SASVs_df %>%
split(.$Experiment) %>%
map(~.x$seq)
ASV_16S_expts <- ASV_16S_expts0 %>%
gather("SampleID", "Reads", one_of(metadata_16S$SampleID)) %>%
left_join(select(metadata_16S, "Experiment", "SampleID"), by = "SampleID") %>%
right_join(experiment_16SASVs_df, by = c("seq", "Experiment")) # remove ASVs from each experiment that do not appear in respective samples
ASV_ITS <- ASV_ITS0 %>%
select(ASVID, seq, Kingdom, Phylum, Class, Order, Family, Genus, Species, one_of(metadata_ITS$SampleID))
There are 16 ITS ASVs.
| Phylum | Class | Order | Family | Genus | Species |
|---|---|---|---|---|---|
| p__Ascomycota | |||||
| p__Ascomycota | |||||
| p__Ascomycota | |||||
| p__Ascomycota | |||||
| p__Ascomycota | |||||
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Pichiaceae | ||
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Pichiaceae | ||
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Saccharomycetales_fam_Incertae_sedis | Candida | inconspicua |
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Saccharomycetales_fam_Incertae_sedis | Candida | sake |
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Pichiaceae | Dekkera | anomala |
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Pichiaceae | Dekkera | anomala |
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Pichiaceae | Dekkera | anomala |
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Pichiaceae | Dekkera | bruxellensis |
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Pichiaceae | Dekkera | bruxellensis |
| p__Ascomycota | Saccharomycetes | Saccharomycetales | Saccharomycetales_fam_Incertae_sedis | Starmerella | davenportii |
| p__Basidiomycota | Tremellomycetes | Trichosporonales | Trichosporonaceae | Cutaneotrichosporon | curvatus |
All ASVs are assigned at the phylum level, and the phyla Ascomycota and Basidiomycota are represented. Most ASVs are Saccharomycetes. No filtering needed
ASV_df <- ASV_ITS %>%
gather("SampleID", "Reads", one_of(metadata_ITS$SampleID)) %>%
mutate(Experiment="Tea (Fungi)") %>%
full_join(ASV_16S_expts)
metadata_df <- metadata_16S %>%
full_join(metadata_ITS)
The ranges of library sizes varies by experiment, rarefying should be performed separately for alpha diversity analyses.
The minimum library sizes for rarefying are:
Figure S1: Library Sizes
bottom_row <- plot_grid(ggdraw() + draw_image(file.path(figure_out, "minLibSizeTable.png")), ggplot() + theme_void(), ncol = 2, labels = c("B", ""), label_size = 15)
plot_grid(libSizePlot, bottom_row, ncol = 1, rel_widths = 1, rel_heights = c(1,0.3), labels = c("A", "B"), label_size = 15)
#ggsave(filename = file.path(figure_out, paste0(Sys.Date(), "FigureS1_library_sizes.png")))
## $`Chow chow`
## NULL
##
## $Kimchi
## NULL
##
## $Kombucha
## NULL
##
## $`Kombucha (fungi)`
## NULL
raremax <- map(asvMatrices, ~min(rowSums(.x)))
vegan::rarecurve(asvMatrices$`Chow chow`, step = 20, sample = raremax$`Chow chow`, ylab = "ASVs", col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Chow Chow")
vegan::rarecurve(asvMatrices$Kimchi, step = 20, sample = raremax$Kimchi, ylab = "ASVs", label = FALSE, col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Kimchi")
vegan::rarecurve(asvMatrices$Kombucha, step = 20, sample = raremax$Kombucha, ylab = "ASVs", col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Kombucha")
vegan::rarecurve(asvMatrices$`Kombucha (fungi)`, step = 20, sample = raremax$`Kombucha (fungi)`, ylab = "ASVs", col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Kombucha (fungi)")
S <- map(asvMatrices, specnumber) # observed number of ASVs per sample
S %>%
map(~as_tibble(.x, rownames = "SampleID")) %>%
purrr::reduce(full_join) %>%
left_join(metadata_df) %>%
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
ControlSample=factor(ControlSample, levels=c("sample", "brine", "starter", "scoby"), labels = c("sample", "brine", "starter", "commercial scoby"))) %>%
ggplot(aes(x=Experiment, y=value, color=ControlSample)) +
geom_boxplot(alpha=0, color="gray") +
geom_quasirandom(alpha=0.7, size=2, width=0.3) +
scale_color_manual(values=c("#000000", "#56B4E9", "#D55E00", "#009E73")) +
facet_wrap(~Primers, scales="free") +
labs(x=NULL, y="N unique ASVs", color="Sample type")+
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank())
Rarefy to common read depth for each experiment
rarefied_asv_df <- map2(asvMatrices, raremax, rrarefy) %>%
map(~as_tibble(.x, rownames = "SampleID")) %>%
map(~gather(.x, "ASVID", "rarefiedReads", 2:ncol(.x))) %>%
purrr::reduce(full_join) %>%
left_join(ASV_df) %>%
left_join(metadata_df)
Rarefied richness vs. observed richness
rarefied_asv_df %>%
group_by(Experiment, SampleID) %>%
summarize_at(vars("Reads", "rarefiedReads"), ~sum(.x>0)) %>%
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)"))) %>%
ggplot(aes(x=Reads, y=rarefiedReads)) +
geom_abline(yintercept=0, slope = 1, color="gray", linetype=2) +
geom_point(shape=1) +
facet_wrap(~Experiment, scales="free") +
theme(aspect.ratio = 1) +
labs(x="Observed no. of ASVs", y="Rarefied no. of ASVs")
Rarefying doesn’t impact richness much. Largest impact is in kimchi
samples
Do not rarefy (McMurdie & Holmes, 2014) Normalize by calculating relative abundance Make ASV counts proportions
ASV_df_prop <- ASV_df %>%
with_groups(SampleID, mutate, Abundance=Reads/sum(Reads))
| Experiment | No. unique genera | No. unique ASVs |
|---|---|---|
| Chow chow | 80 | 228 |
| Kimchi | 39 | 190 |
| Kombucha | 48 | 95 |
| Kombucha (fungi) | 4 | 12 |
Table of top taxa
ASV_df_prop %>%
group_by(Experiment, ASVID, Family, Genus, Species) %>%
summarize(`Mean abundance`=mean(Abundance)) %>%
replace_na(list(Genus="", Species="")) %>%
group_by(Experiment) %>%
top_n(4, `Mean abundance`) %>%
mutate(`Mean abundance`=round(`Mean abundance`, 2)) %>%
arrange(Experiment, -`Mean abundance`) %>%
ungroup %>%
select(Family, Genus, Species, `Mean abundance`) %>%
kableExtra::kable() %>%
row_spec(0, bold=TRUE) %>%
kable_classic(html_font = "Arial", full_width=FALSE) %>%
pack_rows("Chow chow", 1, 4, bold = TRUE) %>%
pack_rows("Kimchi", 5, 8, bold = TRUE) %>%
pack_rows("Kombucha", 9, 12, bold = TRUE) %>%
pack_rows("Kombucha (fungi)", 13, 16, bold = TRUE)
| Family | Genus | Species | Mean abundance |
|---|---|---|---|
| Chow chow | |||
| Streptococcaceae | Lactococcus | lactis | 0.26 |
| Enterobacteriaceae | Klebsiella | 0.22 | |
| Leuconostocaceae | Weissella | cibaria | 0.12 |
| Leuconostocaceae | Leuconostoc | lactis | 0.08 |
| Kimchi | |||
| Lactobacillaceae | Latilactobacillus | sakei | 0.39 |
| Leuconostocaceae | Weissella | koreensis | 0.13 |
| Enterobacteriaceae | Klebsiella | 0.05 | |
| Lactobacillaceae | 0.04 | ||
| Kombucha | |||
| Acetobacteraceae | Komagataeibacter | 0.60 | |
| Acetobacteraceae | Komagataeibacter | hansenii | 0.29 |
| Acetobacteraceae | Komagataeibacter | 0.05 | |
| Acetobacteraceae | Komagataeibacter | 0.03 | |
| Kombucha (fungi) | |||
| Pichiaceae | Dekkera | bruxellensis | 0.98 |
| Pichiaceae | Dekkera | anomala | 0.01 |
| Pichiaceae | Dekkera | anomala | 0.01 |
| Pichiaceae | Dekkera | anomala | 0.00 |
Stacked bar plot
# get top Species and percents of species not in top
top_species <- ASV_df_prop %>%
fixTaxaNAs() %>%
with_groups(c(SampleID, Experiment, Species2), summarize, Abundance=sum(Abundance)) %>%
with_groups(c(Experiment, Species2), summarize, `Mean abundance`=mean(Abundance)) %>%
group_by(Experiment) %>%
top_n(9, `Mean abundance`) %>%
select(Experiment, Species2) %>%
filter(Species2!="p__Ascomycota")
ASV_df_prop %>%
fixTaxaNAs() %>%
with_groups(c(Experiment, Species2, SampleID), summarize, Abundance=sum(Abundance)) %>%
right_join(top_species) %>%
with_groups(c(Experiment, SampleID), summarize, x=sum(Abundance)) %>%
with_groups(Experiment, summarize, `Percent of reads`=mean(x))
## # A tibble: 4 × 2
## Experiment `Percent of reads`
## <chr> <dbl>
## 1 Chow 0.858
## 2 Kimchi 0.892
## 3 Tea 0.999
## 4 Tea (Fungi) 1.00
other <- ASV_df_prop %>%
fixTaxaNAs() %>%
right_join(top_species) %>%
group_by(SampleID) %>%
mutate(Abundance=1-sum(Abundance)) %>%
select(SampleID, Experiment, Abundance) %>%
unique %>%
mutate(Species2="Other")
# make factors for ordering samples
chowJars <- c( "brine", "T", "O", "P", "C", "TO", "TP", "TC", "OP1", "OC", "PC1", "PC2", "TOP", "TOC", "TPC", "OPC1", "OPC2", "TOPC1", "TOPC2", "TOPC3", "TOPC4", "TOPC5", "TOPC6")
kimchiJars <- c("KC1", "KC2", "KC3", "KC4", "KC5", "KC6", "KC7", "KC8", "KC9", "KC10", "KC11", "KC12", "KR1", "KR2", "KR3", "KR4", "KR5", "KR6", "KR7", "KR8", "KR9", "KR10")
teaJars <- c("G2", "G3", "G4", "G5", "G6", "B2", "B3", "B4", "B5", "B6", "GB2", "GB3", "GB4", "GB5", "GB6", "GB7", "starter", "SCOBYPeak")
chowColors <- c("gray40","#CC79A7", "#0072B2", "#D55E00", "#56B4E9", "#009E73", "#E69F00", "darkslateblue", "#F0E442", "darkgreen")
kimchiColors <- c("gray40", "#CC79A7", "darkblue", "khaki1", "mediumpurple4", "#009E73", "darkred", "lightskyblue3", "darkgreen", "salmon1")
kombuchaColors <- c("gray40", "#CC79A7", "indianred", "darkgoldenrod", "darkcyan", "darkblue","#0072B2", "mediumpurple4", "lightgoldenrod", "darkred")
kombuchaFungiColors <- c("gray40", "forestgreen", "chocolate3", "mediumorchid3", "blue3", "burlywood", "olivedrab4", "lightsteelblue")
barPlotColors <- list(chowColors,
kimchiColors,
kombuchaColors,
kombuchaFungiColors)
bar_plots <- ASV_df_prop %>%
select(-Reads) %>%
fixTaxaNAs() %>%
right_join(top_species) %>%
full_join(other) %>%
left_join(metadata_df) %>%
with_groups(Experiment, mutate, DayO=dense_rank(DayN)) %>%
mutate(DayO=factor(DayO, levels=c(1, 2), labels=c("First", "Second")),
JarID=factor(JarID, levels=c(chowJars, kimchiJars, teaJars)),
Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)"))) %>%
replace_na(list(DayO="First")) %>%
split(.$Experiment) %>%
map(~mutate(.x, Species2=factor(Species2, levels = c("Other", sort(unique(grep("_|Other", Species2, value = TRUE, invert = TRUE))), sort(unique(grep("_", Species2, value = TRUE))))))) %>%
map(~mutate(.x, Species3=case_when(str_detect(Species2, "_|Other")~Species2,
!str_detect(Species2, "_|Other|spp")~paste0("*", Species2, "*"),
Species=="spp."~paste(paste0("*", Genus, "*"), Species)))) %>%
map(~mutate(.x, Species3=factor(Species3, levels = c("Other", sort(unique(grep("_|Other", Species3, value = TRUE, invert = TRUE))), sort(unique(grep("_", Species3, value = TRUE))))))) %>%
map2(., barPlotColors, ~ggplot(.x, aes(x=JarID, y=Abundance, fill=Species2)) +
geom_bar(stat="identity") +
facet_grid(DayO~Experiment, scales = "free_x") +
scale_fill_manual(values=.y, labels=levels(.$Species3)) +
theme(axis.text.x = element_text(angle=45, hjust=1, size=6),
axis.text.y = element_blank(),
legend.position = "bottom",
legend.direction = "vertical",
legend.text = element_markdown(),
strip.text.y = element_blank()) +
labs(x=NULL, y=NULL, fill="Species"))
plot_grid(bar_plots$`Chow chow` + theme(axis.text.y = element_text()) + labs(y="Relative abundance"),
bar_plots$Kimchi,
bar_plots$Kombucha,
bar_plots$`Kombucha (fungi)` + theme(strip.text.y = element_text()),
nrow=1,
rel_widths = c(length(chowJars), length(kimchiJars), length(teaJars), length(teaJars)-1),
align = "h",
axis = "bt")
#ggsave(filename = file.path(figure_out, paste0(Sys.Date(), "_Figure3_abundance_bar_plot.png")))
Komagataeibacter proportion of fungi in kombucha samples
ASV_df_prop %>%
filter(Genus=="Komagataeibacter",
Experiment=="Tea") %>%
left_join(metadata_df[,c("SampleID", "Day")]) %>%
with_groups(c(SampleID, Day), summarize, Abundance=sum(Abundance)) %>%
summarize(mean(Abundance))
## # A tibble: 1 × 1
## `mean(Abundance)`
## <dbl>
## 1 0.998
Dekkera bruxellensis proportion of fungi in kombucha samples
ASV_df_prop %>%
filter(Genus=="Dekkera",
Species=="bruxellensis",
Experiment=="Tea (Fungi)") %>%
left_join(metadata_df[,c("SampleID", "Day")]) %>%
with_groups(c(SampleID, Day), summarize, Abundance=sum(Abundance)) %>%
with_groups(Day, summarize, mean(Abundance))
## # A tibble: 2 × 2
## Day `mean(Abundance)`
## <chr> <dbl>
## 1 eight 0.981
## 2 four 0.983
Does alpha diversity increase as the number of plants in the recipes
increases?
+ Compare the samples from day 10 only for chow chow and day 8 for
kombucha. Cannot use both days in the same statistical test because they
are from the same sample. + Does alpha diversity increase as the number
of plants increases from 1 to 4? Test with Kruskall-Wallace for chow
chow and Wilcoxon ranked sum for kombucha.
diversity_df <- rarefied_asv_df %>%
filter(!is.na(DayN)) %>%
select(SampleID, Experiment, ASVID, rarefiedReads) %>%
nest(ASVmatrix=c("SampleID", "ASVID", "rarefiedReads")) %>%
mutate(ASVmatrix=map(ASVmatrix, ~spread(.x, ASVID, rarefiedReads)), #make a column of ASV matrices for vegan calculations
ASVmatrix=map(ASVmatrix, ~column_to_rownames(.x, var="SampleID")),
ASVmatrix=map(ASVmatrix, as.matrix)) %>%
mutate(richness=map(ASVmatrix, vegan::specnumber),
richness=map(richness, ~as_tibble(.x, rownames = "SampleID")),
shannon=map(ASVmatrix, ~vegan::diversity(.x, "shannon")),
shannon=map(shannon, ~as_tibble(.x, rownames = "SampleID"))) %>%
select(Experiment, richness, shannon) %>%
unnest(cols = c(richness, shannon), names_sep= "_") %>%
select(-shannon_SampleID) %>%
dplyr::rename(SampleID=richness_SampleID) %>%
left_join(metadata_df, by=c("Experiment", "SampleID"))
Perform statistical tests
# Kruskall wallace for chow chow
diversity_df %>%
filter(Experiment=="Chow",
DayN==10, # use second day only
ControlSample=="sample") %>%
mutate(nPlants=as.character(nPlants)) %>%
gather("measure", "value", c(richness_value, shannon_value)) %>%
group_by(measure, Experiment) %>%
kruskal_test(value~nPlants)
## # A tibble: 2 × 8
## Experiment measure .y. n statistic df p method
## * <chr> <chr> <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Chow richness_value value 22 5.99 3 0.112 Kruskal-Wallis
## 2 Chow shannon_value value 22 4.79 3 0.188 Kruskal-Wallis
# not significant
# wilcox for tea
diversity_df %>%
filter(Experiment %in% c("Tea", "Tea (Fungi)"),
DayN==8, # use second day only
ControlSample=="sample") %>%
mutate(nPlants=as.character(nPlants)) %>%
gather("measure", "value", c(richness_value, shannon_value)) %>%
group_by(measure, Experiment) %>%
wilcox_test(value~nPlants)
## # A tibble: 4 × 9
## Experiment measure .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 Tea richness_value value 1 2 10 6 33.5 0.743
## 2 Tea (Fungi) richness_value value 1 2 10 6 35 0.571
## 3 Tea shannon_value value 1 2 10 6 28 0.875
## 4 Tea (Fungi) shannon_value value 1 2 10 6 29 0.958
# no significant differences
richness_nPlant_plot <- diversity_df %>%
filter(Experiment %in% c("Chow", "Tea", "Tea (Fungi)"),
DayN %in% c(8, 10),
ControlSample=="sample") %>%
mutate(Experiment=factor(Experiment, levels=c("Chow", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kombucha", "Kombucha (fungi)")),
nPlants=as.character(nPlants)) %>%
ggplot(aes(x=nPlants, y=richness_value, group=nPlants)) +
geom_boxplot(alpha=0) +
geom_point(alpha=0.5) +
facet_wrap(~Experiment, scales = "free") +
labs(x=NULL, y="Richness")+
scale_y_continuous(limits = c(0, NA)) +
theme(axis.text.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank())
shannon_nPlant_plot <- diversity_df %>%
filter(Experiment %in% c("Chow", "Tea", "Tea (Fungi)"),
DayN %in% c(8, 10),
ControlSample=="sample") %>%
mutate(Experiment=factor(Experiment, levels=c("Chow", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kombucha", "Kombucha (fungi)")),
nPlants=as.character(nPlants)) %>%
ggplot(aes(x=nPlants, y=shannon_value, group=nPlants)) +
geom_boxplot(alpha=0) +
geom_point(alpha=0.5) +
scale_y_continuous(limits = c(0, NA)) +
facet_wrap(~Experiment, scales = "free") +
theme(strip.text.x = element_blank()) +
labs(x="No. of plant substrates", y="Shannon index")+
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank())
plot_grid(richness_nPlant_plot, shannon_nPlant_plot, nrow=2)
#ggsave(filename = file.path(figure_out, paste0(Sys.Date(), "Figure4_alpha_vs_nsubstrates.png")))
Perform statistical tests
# Kruskall wallace for Tea
alpha_by_substrate_tea <- diversity_df %>%
filter(Experiment %in% c("Tea", "Tea (Fungi)"),
DayN==8,
ControlSample=="sample") %>%
mutate(Plant=factor(Plant, levels=c("green", "black", "greenblack"),
labels=c("Green", "Black", "Green & Black"))) %>%
gather("measure", "value", c(richness_value, shannon_value)) %>%
group_by(measure, Experiment) %>%
kruskal_test(value~Plant)
alpha_by_substrate_tea
## # A tibble: 4 × 8
## Experiment measure .y. n statistic df p method
## * <chr> <chr> <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Tea richness_value value 16 5.80 2 0.0551 Kruskal-Wallis
## 2 Tea (Fungi) richness_value value 16 0.610 2 0.737 Kruskal-Wallis
## 3 Tea shannon_value value 16 4.85 2 0.0884 Kruskal-Wallis
## 4 Tea (Fungi) shannon_value value 16 0.294 2 0.863 Kruskal-Wallis
# not significant
# wilcox for kimchi
alpha_by_substrate_kimchi <- diversity_df %>%
filter(Experiment=="Kimchi",
DayN==10,
ControlSample=="sample") %>%
mutate(Plant=factor(Plant, levels=c("Cabbage", "Radish"),
labels=c("Cabbage", "Radish"))) %>%
gather("measure", "value", c(richness_value, shannon_value)) %>%
group_by(measure, Experiment) %>%
wilcox_test(value~Plant, alternative="two.sided") %>%
adjust_pvalue(p.col="p", method = "BH")
alpha_by_substrate_kimchi
## # A tibble: 2 × 10
## Experiment measure .y. group1 group2 n1 n2 statistic p p.adj
## <chr> <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 Kimchi richness… value Cabba… Radish 12 10 106 0.00257 0.00514
## 2 Kimchi shannon_… value Cabba… Radish 12 10 94 0.0249 0.0249
# Both significant
Plot
richness_Plant_plot <- diversity_df %>%
filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"), DayN %in% c(8, 10), ControlSample=="sample") %>%
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
Plant=factor(Plant, levels=c("Cabbage", "Radish", "green", "black", "greenblack"), labels=c("Cabbage", "Radish", "Green", "Black", "Green & Black")),
plabel=case_when(Experiment=="Kimchi"~"*",
Experiment!="Kimchi"~NA)) %>%
with_groups(Experiment, mutate, py=max(richness_value)) %>%
ggplot(aes(x=Plant, y=richness_value, group=Plant, label=plabel)) +
geom_boxplot(alpha=0) +
geom_point(alpha=0.5) +
geom_text(aes(x=1.5, y=py-2), size=5) +
facet_wrap(~Experiment, scales = "free") +
scale_y_continuous(limits = c(0, NA)) +
theme(axis.text.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()) +
labs(x=NULL, y="Richness")
shannon_Plant_plot <- diversity_df %>%
filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"), DayN %in% c(8, 10), ControlSample=="sample") %>%
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
Plant=factor(Plant, levels=c("Cabbage", "Radish", "green", "black", "greenblack"), labels=c("Cabbage", "Radish", "Green", "Black", "Green\n&\nBlack")),
plabel=case_when(Experiment=="Kimchi"~"*",
Experiment!="Kimchi"~NA)) %>%
with_groups(Experiment, mutate, py=max(shannon_value)) %>%
ggplot(aes(x=Plant, y=shannon_value, group=Plant, label=plabel)) +
geom_boxplot(alpha=0) +
geom_point(alpha=0.5) +
geom_text(aes(x=1.5, y=py-0.3), size=5) +
facet_wrap(~Experiment, scales = "free") +
scale_y_continuous(limits = c(0, NA)) +
theme(strip.text.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()) +
labs(x="Substrate", y="Shannon index")
substrate_alpha_plot <- plot_grid(richness_Plant_plot, shannon_Plant_plot, nrow=2, rel_heights = c(1, 1.27))
Calculate Bray-Curtis distances and visualize with NMDS Substrates (second day only)
set.seed(123)
nmds <- ASV_df_prop %>%
left_join(metadata_df) %>%
filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"),
ControlSample=="sample",
DayN %in% c(8, 10)) %>%
select(SampleID, Experiment, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~select(.x, -Experiment)) %>%
map(~spread(.x, ASVID, Abundance)) %>%
map(~column_to_rownames(.x, var = "SampleID")) %>%
map(as.matrix) %>%
map(~metaMDS(.x, distance = "bray"))
## Run 0 stress 0.1208933
## Run 1 stress 0.1208933
## ... New best solution
## ... Procrustes: rmse 1.808388e-05 max resid 4.655066e-05
## ... Similar to previous best
## Run 2 stress 0.127527
## Run 3 stress 0.09663717
## ... New best solution
## ... Procrustes: rmse 0.1259782 max resid 0.4879174
## Run 4 stress 0.1208933
## Run 5 stress 0.1287598
## Run 6 stress 0.1268688
## Run 7 stress 0.1229075
## Run 8 stress 0.1289299
## Run 9 stress 0.1287597
## Run 10 stress 0.1241837
## Run 11 stress 0.127527
## Run 12 stress 0.1144112
## Run 13 stress 0.1296422
## Run 14 stress 0.1241837
## Run 15 stress 0.1268687
## Run 16 stress 0.1229076
## Run 17 stress 0.1199214
## Run 18 stress 0.1227962
## Run 19 stress 0.09663717
## ... Procrustes: rmse 8.986313e-06 max resid 2.542821e-05
## ... Similar to previous best
## Run 20 stress 0.1208933
## *** Best solution repeated 1 times
## Run 0 stress 0.01126436
## Run 1 stress 0.01126439
## ... Procrustes: rmse 3.331307e-05 max resid 0.0001063497
## ... Similar to previous best
## Run 2 stress 0.01126436
## ... New best solution
## ... Procrustes: rmse 9.097059e-05 max resid 0.0002582499
## ... Similar to previous best
## Run 3 stress 0.04072022
## Run 4 stress 0.04011555
## Run 5 stress 0.01126431
## ... New best solution
## ... Procrustes: rmse 5.095299e-05 max resid 0.0001061652
## ... Similar to previous best
## Run 6 stress 0.01126432
## ... Procrustes: rmse 4.954176e-05 max resid 0.0001633399
## ... Similar to previous best
## Run 7 stress 0.0112643
## ... New best solution
## ... Procrustes: rmse 8.783971e-06 max resid 2.86955e-05
## ... Similar to previous best
## Run 8 stress 0.01784174
## Run 9 stress 0.04072026
## Run 10 stress 0.01126438
## ... Procrustes: rmse 8.653706e-05 max resid 0.0002597349
## ... Similar to previous best
## Run 11 stress 0.01784186
## Run 12 stress 0.01126439
## ... Procrustes: rmse 8.987895e-05 max resid 0.0002412924
## ... Similar to previous best
## Run 13 stress 0.04072023
## Run 14 stress 0.3063679
## Run 15 stress 0.01784179
## Run 16 stress 0.04072018
## Run 17 stress 0.01126439
## ... Procrustes: rmse 0.0001018014 max resid 0.000343954
## ... Similar to previous best
## Run 18 stress 0.01126439
## ... Procrustes: rmse 0.0001002056 max resid 0.0003355294
## ... Similar to previous best
## Run 19 stress 0.01784187
## Run 20 stress 0.01126433
## ... Procrustes: rmse 5.383372e-05 max resid 0.000182465
## ... Similar to previous best
## *** Best solution repeated 6 times
## Run 0 stress 0.001558399
## Run 1 stress 0.002553419
## Run 2 stress 0.003591747
## Run 3 stress 0.005606527
## Run 4 stress 0.005925471
## Run 5 stress 0.003227221
## Run 6 stress 0.007715555
## Run 7 stress 0.005462563
## Run 8 stress 0.006489388
## Run 9 stress 0.003718972
## Run 10 stress 0.006054452
## Run 11 stress 0.003266549
## Run 12 stress 0.005830314
## Run 13 stress 0.002795648
## Run 14 stress 0.007130729
## Run 15 stress 0.003833059
## Run 16 stress 0.003509317
## Run 17 stress 0.007856374
## Run 18 stress 0.005312844
## Run 19 stress 0.006906621
## Run 20 stress 0.003073519
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: no. of iterations >= maxit
data.scores <- nmds %>%
map(~scores(.x)$sites) %>%
map(as.data.frame) %>%
map(~rownames_to_column(.x, var="SampleID")) %>%
map2(., names(.), ~mutate(.x, Experiment=.y)) %>%
purrr::reduce(full_join) %>%
left_join(metadata_df) %>%
mutate(Plant=factor(Plant, levels=c("Cabbage", "Radish", "green", "black", "greenblack"), labels=c("Cabbage", "Radish", "Green", "Black", "Green & Black")))
plantNMDSKimchi <- data.scores %>%
filter(Experiment=="Kimchi") %>%
ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
geom_point() +
facet_wrap(~Experiment, scales = "free") +
scale_color_manual(values=c("#0072B2", "#D55E00")) +
theme(legend.position = "none") +
labs(x=NULL, y=NULL)
kimchiLegend <- get_legend(
plantNMDSKimchi +
theme(legend.position = "bottom",
legend.direction = "vertical",
legend.justification = c(0,1)) +
labs(color="Kimchi substrates"))
plantNMDSTea3 <- data.scores %>%
filter(Experiment=="Tea") %>%
mutate(Experiment="Kombucha: all conditions") %>%
ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
geom_point() +
facet_wrap(~Experiment, scales = "free") +
scale_color_manual(values=c("#009E73","#000000", "#CC79A7")) +
theme(legend.position = "none") +
labs(y=NULL, x=NULL)
teaLegend <- get_legend(
plantNMDSTea3 +
#guides(color=guide_legend(ncol = 2)) +
theme(legend.position = "bottom",
legend.direction = "vertical",
legend.justification = c(0,1)) +
labs(color="Kombucha substrates"))
plantNMDSTeaITS <- data.scores %>%
filter(Experiment=="Tea (Fungi)") %>%
mutate(Experiment="Kombucha (fungi)") %>%
ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
geom_point() +
facet_wrap(~Experiment, scales = "free") +
scale_color_manual(values=c("#009E73","#000000", "#CC79A7")) +
theme(legend.position = "none",
axis.text.x = element_text(size=10)) +
labs(y=NULL, x=NULL)
#tea with only green or black samples only
nmds <- ASV_df_prop %>%
left_join(metadata_df) %>%
filter(Experiment %in% c("Tea", "Tea (Fungi)"),
ControlSample=="sample",
DayN==8,
Plant %in% c("green", "black")) %>%
select(SampleID, Experiment, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~select(.x, -Experiment)) %>%
map(~spread(.x, ASVID, Abundance)) %>%
map(~column_to_rownames(.x, var = "SampleID")) %>%
map(as.matrix) %>%
map(~metaMDS(.x, distance = "bray"))
## Run 0 stress 0.0001979636
## Run 1 stress 0.002999503
## Run 2 stress 0.001481388
## Run 3 stress 0.003854223
## Run 4 stress 0.0008800118
## Run 5 stress 0.004516088
## Run 6 stress 0.00161516
## Run 7 stress 0.004567798
## Run 8 stress 0.003015539
## Run 9 stress 0.004088506
## Run 10 stress 0.001841872
## Run 11 stress 0.001527751
## Run 12 stress 0.267585
## Run 13 stress 0.2909045
## Run 14 stress 0.001617847
## Run 15 stress 0.003780789
## Run 16 stress 0.004549937
## Run 17 stress 0.002416143
## Run 18 stress 0.001824797
## Run 19 stress 0.003793538
## Run 20 stress 0.002425196
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 17: no. of iterations >= maxit
## 2: stress ratio > sratmax
## 1: scale factor of the gradient < sfgrmin
## Run 0 stress 0
## Run 1 stress 9.717209e-05
## ... Procrustes: rmse 0.1368804 max resid 0.2899485
## Run 2 stress 9.775551e-05
## ... Procrustes: rmse 0.04444806 max resid 0.07970759
## Run 3 stress 8.986117e-05
## ... Procrustes: rmse 0.06600979 max resid 0.1072668
## Run 4 stress 0.0002962278
## ... Procrustes: rmse 0.05635873 max resid 0.1190616
## Run 5 stress 9.667155e-05
## ... Procrustes: rmse 0.06736211 max resid 0.1480469
## Run 6 stress 8.90455e-05
## ... Procrustes: rmse 0.1504335 max resid 0.3254534
## Run 7 stress 0.0004860919
## ... Procrustes: rmse 0.06615963 max resid 0.1073563
## Run 8 stress 9.295456e-05
## ... Procrustes: rmse 0.1152558 max resid 0.2325063
## Run 9 stress 8.521991e-05
## ... Procrustes: rmse 0.03167347 max resid 0.05122187
## Run 10 stress 9.775636e-05
## ... Procrustes: rmse 0.06554206 max resid 0.1401172
## Run 11 stress 9.809775e-05
## ... Procrustes: rmse 0.05452494 max resid 0.1284415
## Run 12 stress 0.0007590797
## Run 13 stress 9.979202e-05
## ... Procrustes: rmse 0.0767341 max resid 0.1528814
## Run 14 stress 9.908352e-05
## ... Procrustes: rmse 0.04052691 max resid 0.07716616
## Run 15 stress 9.984045e-05
## ... Procrustes: rmse 0.06071516 max resid 0.124504
## Run 16 stress 0.0002293478
## ... Procrustes: rmse 0.05709814 max resid 0.1424378
## Run 17 stress 0.0002877755
## ... Procrustes: rmse 0.06256337 max resid 0.1164542
## Run 18 stress 9.703947e-05
## ... Procrustes: rmse 0.04414064 max resid 0.09118404
## Run 19 stress 9.681282e-05
## ... Procrustes: rmse 0.04899204 max resid 0.07891642
## Run 20 stress 9.904094e-05
## ... Procrustes: rmse 0.05947261 max resid 0.1215823
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 5: no. of iterations >= maxit
## 15: stress < smin
data.scores <- nmds %>%
map(~scores(.x)$sites) %>%
map(as.data.frame) %>%
map(~rownames_to_column(.x, var="SampleID")) %>%
map2(., names(.), ~mutate(.x, Experiment=.y)) %>%
purrr::reduce(full_join) %>%
left_join(metadata_df) %>%
mutate(Plant=factor(Plant, levels=c("green", "black"), labels=c("Green", "Black")))
plantNMDSTea2 <- data.scores %>%
filter(Experiment=="Tea") %>%
mutate(Experiment="Kombucha: green OR black") %>%
ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
geom_point() +
facet_wrap(~Experiment, scales = "free") +
scale_color_manual(values=c("#009E73","#000000")) +
theme(legend.position = "none",
strip.text.x = element_text(size=9)) +
labs(x=NULL, y=NULL)
plantNMDSTeaITS2 <- data.scores %>%
filter(Experiment=="Tea (Fungi)") %>%
mutate(Experiment="Kombucha (f): green OR black") %>%
ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
geom_point() +
facet_wrap(~Experiment, scales = "free") +
scale_color_manual(values=c("#009E73","#000000")) +
theme(legend.position = "none",
axis.text.x = element_text(size=10),
strip.text.x = element_text(size=8)) +
labs(x=NULL, y=NULL)
plot
legends <- plot_grid(kimchiLegend, teaLegend, theme_void(), ncol=1, rel_heights = c(1, 1, 0.5))
row1 <- plot_grid(plantNMDSKimchi, plantNMDSTea3, plantNMDSTea2, nrow=1)
row2 <- plot_grid(plantNMDSTeaITS, plantNMDSTeaITS2, legends, nrow=1, rel_widths = c(1, 1, 0.7))
row12 <- plot_grid(row1, add_sub(row2, "NMDS1"), ncol=1, rel_heights = c(1, 1.1))
substrate_beta_plot <- plot_grid(add_sub(plot_grid(ggplot+theme_void()+theme(panel.grid.minor = element_blank()), row12, nrow=1, rel_widths = c(0.05,1)), "NMDS2", angle=90, x=0.05, y=3.9))
PERMANOVA
#tea grean, black, and green and black
bray_by_plant_data <- metadata_df %>%
filter(Experiment %in% c("Tea", "Tea (Fungi)"),
ControlSample=="sample",
DayN %in% c(8, 10)) %>%
column_to_rownames(var="SampleID") %>%
split(.$Experiment)
bray_by_plant_asvs <- ASV_df_prop %>%
left_join(metadata_df) %>%
filter(Experiment %in% c("Tea", "Tea (Fungi)"),
ControlSample=="sample",
DayN %in% c(8, 10)) %>%
select(Experiment, SampleID, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~spread(.x, ASVID, Abundance)) %>%
map(~select(.x, -Experiment)) %>%
map(~column_to_rownames(.x, var="SampleID"))
set.seed(021324)
map2(bray_by_plant_data, bray_by_plant_asvs, ~adonis2(formula=.y~nPlants+Plant, data=.x, permutations=999, method="bray"))
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ nPlants + Plant, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 0.41983 0.35997 3.6558 0.05 *
## Residual 13 0.74646 0.64003
## Total 15 1.16629 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ nPlants + Plant, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 0.00003613 0.01702 0.1126 0.93
## Residual 13 0.00208642 0.98298
## Total 15 0.00212255 1.00000
Number of plants not significant, so it was removed.
set.seed(021324)
map2(bray_by_plant_data, bray_by_plant_asvs, ~adonis2(formula=.y~Plant, data=.x, permutations=999, method="bray"))
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 0.41983 0.35997 3.6558 0.05 *
## Residual 13 0.74646 0.64003
## Total 15 1.16629 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 0.00003613 0.01702 0.1126 0.93
## Residual 13 0.00208642 0.98298
## Total 15 0.00212255 1.00000
#kimchi and 2 teas
bray_by_plant_data <- metadata_df %>%
filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"),
ControlSample=="sample",
DayN %in% c(8, 10),
Plant!="greenblack") %>%
column_to_rownames(var="SampleID") %>%
split(.$Experiment)
bray_by_plant_asvs <- ASV_df_prop %>%
left_join(metadata_df) %>%
filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"),
ControlSample=="sample",
DayN %in% c(8, 10),
Plant!="greenblack") %>%
select(Experiment, SampleID, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~spread(.x, ASVID, Abundance)) %>%
map(~select(.x, -Experiment)) %>%
map(~column_to_rownames(.x, var="SampleID"))
set.seed(021324)
map2(bray_by_plant_data, bray_by_plant_asvs, ~adonis2(formula=.y~Plant, data=.x, permutations=999, method="bray"))
## $Kimchi
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.62055 0.20049 5.0154 0.001 ***
## Residual 20 2.47461 0.79951
## Total 21 3.09516 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.37947 0.47689 7.2931 0.042 *
## Residual 8 0.41625 0.52311
## Total 9 0.79573 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.00002917 0.01558 0.1267 0.772
## Residual 8 0.00184244 0.98442
## Total 9 0.00187161 1.00000
plot_grid(substrate_alpha_plot, substrate_beta_plot, nrow = 2, rel_heights = c(0.8, 1), labels = c("A", "B"))
#ggsave(filename = file.path(figure_out, paste(Sys.Date(), "Figure5_substrate_figure.png", sep = "_")))
statistical tests
#Chow chow and tea
#kruskall wallis
pHkruskalls <- pH_df %>%
filter(Experiment=="Chow"|Experiment=="Tea",
ControlSample=="sample") %>%
with_groups(JarID, mutate, DayO=dense_rank(Day)) %>%
group_by(Experiment) %>%
kruskal_test(pH~DayO)
pHkruskalls
## # A tibble: 2 × 7
## Experiment .y. n statistic df p method
## * <chr> <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Chow pH 71 47.6 2 4.67e-11 Kruskal-Wallis
## 2 Tea pH 64 57.8 3 1.7 e-12 Kruskal-Wallis
#dunns posthoc
pH_df %>%
filter(Experiment=="Chow"|Experiment=="Tea",
ControlSample=="sample") %>%
with_groups(JarID, mutate, DayO=dense_rank(Day)) %>%
group_by(Experiment) %>%
dunn_test(pH~DayO, p.adjust.method = "BH")
## # A tibble: 9 × 10
## Experiment .y. group1 group2 n1 n2 statistic p p.adj
## * <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 Chow pH 1 2 25 24 -4.95 7.28e- 7 1.09e- 6
## 2 Chow pH 1 3 25 22 -6.59 4.53e-11 1.36e-10
## 3 Chow pH 2 3 24 22 -1.73 8.43e- 2 8.43e- 2
## 4 Tea pH 1 2 16 16 -2.50 1.23e- 2 1.85e- 2
## 5 Tea pH 1 3 16 16 -4.85 1.21e- 6 3.64e- 6
## 6 Tea pH 1 4 16 16 -7.23 4.74e-13 2.85e-12
## 7 Tea pH 2 3 16 16 -2.35 1.87e- 2 1.87e- 2
## 8 Tea pH 2 4 16 16 -4.73 2.25e- 6 4.49e- 6
## 9 Tea pH 3 4 16 16 -2.38 1.74e- 2 1.87e- 2
## # ℹ 1 more variable: p.adj.signif <chr>
#kimchi
kimchipHinput <- pH_df %>%
filter(Experiment=="Kimchi",
ControlSample=="sample") %>%
with_groups(JarID, mutate, DayO=dense_rank(Day)) %>%
select(JarID, pH, DayO) %>%
spread(DayO, pH)
kimchipHwilcox <- wilcox.test(kimchipHinput$`1`, kimchipHinput$`2`, paired = TRUE, alternative = "greater")
kimchipHwilcox
##
## Wilcoxon signed rank exact test
##
## data: kimchipHinput$`1` and kimchipHinput$`2`
## V = 253, p-value = 2.384e-07
## alternative hypothesis: true location shift is greater than 0
kimchipHwilcox$p.value
## [1] 2.384186e-07
pH_pvalues <- tribble(~Experiment, ~p.value,
"Chow chow", pHkruskalls$p[pHkruskalls$Experiment=="Chow"],
"Kimchi", kimchipHwilcox$p.value,
"Kombucha", pHkruskalls$p[pHkruskalls$Experiment=="Tea"]) %>%
mutate(p.value=formatC(p.value, format = "e", digits = 2))
pH_pgroups <- tribble(~Experiment, ~Day, ~label,
"Chow chow", 0, "a",
"Chow chow", 3, "b",
"Chow chow", 10, "b",
"Kimchi", 3, "a",
"Kimchi", 10, "b",
"Kombucha", 0, "a",
"Kombucha", 4, "b",
"Kombucha", 8, "c",
"Kombucha", 21, "d")
Plot
pH_day_plot <- pH_df %>%
add_row(Experiment="Kimchi", pH=NULL, Day=0, ControlSample="sample") %>% # add fake row to make chow chow and kimchi x-axis scales the same
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
ControlSample=factor(ControlSample, levels=c("sample", "brine", "starter"))) %>%
ggplot(aes(x=Day, y=pH)) +
geom_line(aes(group=JarID), color="gray") +
geom_point(aes(color=ControlSample), alpha=0.6) +
geom_text(data=pH_pgroups, aes(x=Day, y=7.6, label=label)) +
geom_text(data = pH_pvalues, aes(x=0, y=0.25, label=paste("p = ", p.value)), hjust="left") +
okabe_ito_colors +
scale_x_continuous(n.breaks = 6) +
scale_y_continuous(breaks = c(0, 2, 4, 6, 8), labels = c(0, 2, 4, 6, 8), limits = c(0,8)) +
facet_wrap(~Experiment, nrow=1, scales="free_x") +
theme(axis.text = element_text(size=11),
legend.title = element_text(size=15),
legend.text = element_text(size=13),
panel.grid.minor = element_blank()) +
labs(x="Day", y="pH", color="Sample type")
pH_day_plot
Does alpha diversity increase as the number of days elapsed
increases?
+ Test with Wilcoxon Rank-Sum test, comparing raw richness and Shannon
index between 3 and 10 days (4 and 8 for tea experiment)
Run paired wilcoxon test
diversity_df %>%
with_groups(JarID, filter, n_distinct(SampleID)=="2"|n_distinct(SampleID)==4) %>% # remove samples that don't have two timepoint measurements
with_groups(Experiment, mutate, DayO=dense_rank(DayN)) %>%
mutate(DayO=factor(DayO, levels=c(1, 2), labels=c("First", "Second"))) %>%
select(JarID, Experiment, DayO, richness_value, shannon_value) %>%
gather("measure", "value", c(richness_value, shannon_value)) %>%
spread(DayO, value) %>%
split(list(.$measure, .$Experiment)) %>%
map(~wilcox.test(.x$Second, .x$First, paired = TRUE, alternative = "greater")) %>%
map(tidy) %>%
map2(., names(.), ~mutate(.x, x=.y)) %>%
purrr::reduce(full_join) %>%
separate(x, into=c("measure", "Experiment"), sep="\\.") %>%
select(measure, Experiment, statistic, p.value) %>%
arrange(measure) %>%
group_by(Experiment) %>%
adjust_pvalue(p.col="p.value", method = "BH")
## # A tibble: 8 × 5
## Experiment measure statistic p.value p.value.adj
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Chow richness_value 120. 0.167 0.333
## 2 Chow shannon_value 124 0.538 0.538
## 3 Kimchi richness_value 175 0.0593 0.119
## 4 Kimchi shannon_value 100 0.806 0.806
## 5 Tea richness_value 75.5 0.0780 0.156
## 6 Tea shannon_value 33 0.983 0.983
## 7 Tea (Fungi) richness_value 17.5 0.294 0.294
## 8 Tea (Fungi) shannon_value 91 0.259 0.294
richness_day_plot <- diversity_df %>%
with_groups(JarID, filter, n_distinct(SampleID)=="2"|n_distinct(SampleID)==4) %>% # remove samples that don't have two timepoint measurements
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
DayN=as.character(DayN),
DayN=factor(DayN, levels=c("3", "4", "8","10"))) %>%
ggplot(aes(x=DayN, y=richness_value)) +
geom_line(aes(group=JarID), color="gray") +
geom_point(alpha=0.5) +
scale_x_discrete(drop=TRUE) +
facet_wrap(~Experiment, scales = "free", nrow=1) +
scale_y_continuous(limits = c(0, NA)) +
theme(axis.text.x = element_blank(),
axis.text = element_text(size=11),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()) +
labs(x=NULL, y="Richness")
shannon_day_plot <- diversity_df %>%
with_groups(JarID, filter, n_distinct(SampleID)=="2"|n_distinct(SampleID)==4) %>% # remove samples that don't have two timepoint measurements
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
DayN=as.character(DayN),
DayN=factor(DayN, levels=c("3", "4", "8","10"))) %>%
ggplot(aes(x=DayN, y=shannon_value)) +
geom_line(aes(group=JarID), color="gray") +
geom_point(alpha=0.5) +
scale_x_discrete(drop=TRUE) +
facet_wrap(~Experiment, scales = "free", nrow = 1) +
scale_y_continuous(limits = c(0, NA)) +
theme(strip.text.x = element_blank(),
axis.text = element_text(size=11),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()) +
labs(x="Day", y="Shannon index")
succession_alpha_plot <- plot_grid(richness_day_plot, shannon_day_plot, nrow=2, align = "v", axis="lr")
succession_alpha_plot
plot
set.seed(123)
nmds <- ASV_df_prop %>%
select(SampleID, Experiment, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~select(.x, -Experiment)) %>%
map(~spread(.x, ASVID, Abundance)) %>%
map(~column_to_rownames(.x, var = "SampleID")) %>%
map(as.matrix) %>%
map(~metaMDS(.x, distance = "bray"))
## Run 0 stress 0.1774703
## Run 1 stress 0.2146611
## Run 2 stress 0.1944882
## Run 3 stress 0.1759535
## ... New best solution
## ... Procrustes: rmse 0.08530666 max resid 0.5111269
## Run 4 stress 0.1962391
## Run 5 stress 0.17615
## ... Procrustes: rmse 0.09668807 max resid 0.5841405
## Run 6 stress 0.1751244
## ... New best solution
## ... Procrustes: rmse 0.1005305 max resid 0.6174738
## Run 7 stress 0.1759353
## Run 8 stress 0.1759481
## Run 9 stress 0.1957706
## Run 10 stress 0.2093671
## Run 11 stress 0.1759983
## Run 12 stress 0.1861465
## Run 13 stress 0.3929351
## Run 14 stress 0.1957543
## Run 15 stress 0.1947856
## Run 16 stress 0.2015187
## Run 17 stress 0.1759843
## Run 18 stress 0.2023919
## Run 19 stress 0.1753271
## ... Procrustes: rmse 0.008028507 max resid 0.03546564
## Run 20 stress 0.1774703
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 5: no. of iterations >= maxit
## 15: stress ratio > sratmax
## Run 0 stress 0.1460225
## Run 1 stress 0.160676
## Run 2 stress 0.1460225
## ... New best solution
## ... Procrustes: rmse 7.387893e-06 max resid 3.19048e-05
## ... Similar to previous best
## Run 3 stress 0.207423
## Run 4 stress 0.180311
## Run 5 stress 0.1614875
## Run 6 stress 0.191577
## Run 7 stress 0.1462274
## ... Procrustes: rmse 0.006019354 max resid 0.0297096
## Run 8 stress 0.1696063
## Run 9 stress 0.1930495
## Run 10 stress 0.1565812
## Run 11 stress 0.176654
## Run 12 stress 0.1943849
## Run 13 stress 0.173934
## Run 14 stress 0.173934
## Run 15 stress 0.177754
## Run 16 stress 0.1460225
## ... New best solution
## ... Procrustes: rmse 1.263504e-05 max resid 4.57652e-05
## ... Similar to previous best
## Run 17 stress 0.1460225
## ... New best solution
## ... Procrustes: rmse 1.177535e-05 max resid 5.474131e-05
## ... Similar to previous best
## Run 18 stress 0.2239994
## Run 19 stress 0.1620144
## Run 20 stress 0.1622134
## *** Best solution repeated 1 times
## Run 0 stress 0.02536552
## Run 1 stress 0.05379719
## Run 2 stress 0.02536552
## ... Procrustes: rmse 1.394032e-05 max resid 3.923488e-05
## ... Similar to previous best
## Run 3 stress 0.05302885
## Run 4 stress 0.02536552
## ... New best solution
## ... Procrustes: rmse 1.439869e-05 max resid 3.26021e-05
## ... Similar to previous best
## Run 5 stress 0.05302885
## Run 6 stress 0.02536553
## ... Procrustes: rmse 2.613274e-05 max resid 6.407172e-05
## ... Similar to previous best
## Run 7 stress 0.08879125
## Run 8 stress 0.05302891
## Run 9 stress 0.08879126
## Run 10 stress 0.09127608
## Run 11 stress 0.09594028
## Run 12 stress 0.02536552
## ... Procrustes: rmse 2.191914e-05 max resid 4.969234e-05
## ... Similar to previous best
## Run 13 stress 0.02536553
## ... Procrustes: rmse 3.079821e-05 max resid 7.090852e-05
## ... Similar to previous best
## Run 14 stress 0.09706119
## Run 15 stress 0.0530289
## Run 16 stress 0.05302881
## Run 17 stress 0.0676082
## Run 18 stress 0.09991109
## Run 19 stress 0.02536552
## ... New best solution
## ... Procrustes: rmse 7.703706e-06 max resid 2.204142e-05
## ... Similar to previous best
## Run 20 stress 0.09043775
## *** Best solution repeated 1 times
## Run 0 stress 0.007347678
## Run 1 stress 0.009895093
## Run 2 stress 0.01051935
## Run 3 stress 0.008798682
## Run 4 stress 0.009623193
## Run 5 stress 0.01150947
## Run 6 stress 0.01119637
## Run 7 stress 0.00850877
## Run 8 stress 0.00864702
## Run 9 stress 0.01302275
## Run 10 stress 0.008439507
## Run 11 stress 0.007494895
## ... Procrustes: rmse 0.01681193 max resid 0.06642044
## Run 12 stress 0.009389426
## Run 13 stress 0.01195031
## Run 14 stress 0.0110265
## Run 15 stress 0.007460751
## ... Procrustes: rmse 0.01627929 max resid 0.06412957
## Run 16 stress 0.01170512
## Run 17 stress 0.00966996
## Run 18 stress 0.01077525
## Run 19 stress 0.01039775
## Run 20 stress 0.01159429
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: no. of iterations >= maxit
data.scores <- nmds %>%
map(~scores(.x)$sites) %>%
map(as.data.frame) %>%
map(~rownames_to_column(.x, var="SampleID")) %>%
map2(., names(.), ~mutate(.x, Experiment=.y)) %>%
purrr::reduce(full_join) %>%
left_join(metadata_df) %>%
mutate(DayN=as.character(DayN)) %>%
replace_na(list(DayN="NA")) %>%
mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
DayN=factor(DayN, levels=c("NA", "3", "4", "8", "10")),
ControlSample=factor(ControlSample, levels=c("sample", "brine", "starter", "scoby"), labels=c("sample", "brine", "starter", "commercial scoby")))
succession_beta_plot <- data.scores %>%
ggplot(aes(x=NMDS1, y=NMDS2, color=DayN, shape=ControlSample)) +
geom_point() +
facet_wrap(~Experiment, scales = "free") +
okabe_ito_colors +
theme(axis.text = element_text(size=11),
legend.title = element_text(size=15),
legend.text = element_text(size=13),
panel.grid.minor = element_blank()) +
labs(color="Day", shape="Sample type")
succession_beta_plot
bray_by_day_data <- metadata_df %>%
filter(ControlSample=="sample") %>%
column_to_rownames(var="SampleID") %>%
split(.$Experiment)
bray_by_day_asvs <- ASV_df_prop %>%
left_join(metadata_df) %>%
filter(ControlSample=="sample") %>%
select(Experiment, SampleID, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~spread(.x, ASVID, Abundance)) %>%
map(~select(.x, -Experiment)) %>%
map(~column_to_rownames(.x, var="SampleID"))
set.seed(021324)
map2(bray_by_day_data, bray_by_day_asvs, ~adonis2(formula=.y~Day, data=.x, permutations=999, method="bray"))
## $Chow
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5867 0.06814 3.0713 0.007 **
## Residual 42 8.0239 0.93186
## Total 43 8.6106 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Kimchi
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6929 0.07113 3.2162 0.009 **
## Residual 42 9.0488 0.92887
## Total 43 9.7417 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 -0.00059 -0.00032 -0.0096 0.963
## Residual 30 1.84885 1.00032
## Total 31 1.84826 1.00000
##
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.0000233 0.00712 0.2152 0.689
## Residual 30 0.0032499 0.99288
## Total 31 0.0032732 1.00000
plot_grid(pH_day_plot, succession_alpha_plot, succession_beta_plot, ncol=1, labels = c("A", "B", "C"), rel_heights = c(1, 1.4, 1.9))
#ggsave(filename = file.path(figure_out, paste(Sys.Date(), "FigureS3_succession_figure.png", sep="_")))
#10% of samples
sample10 <- metadata_df %>%
count(Experiment) %>%
mutate(n10=n/10)
sample10
## # A tibble: 4 × 3
## Experiment n n10
## <chr> <int> <dbl>
## 1 Chow 45 4.5
## 2 Kimchi 44 4.4
## 3 Tea 35 3.5
## 4 Tea (Fungi) 34 3.4
#significant differences in communities by day in Kimchi and Chow chow samples
indvday0 <- ASV_df_prop %>%
with_groups(c(Experiment, ASVID), filter, sum(Abundance>0)>=4) %>% # in 10% of samples
with_groups(c(Experiment, ASVID), filter, mean(Abundance)>=0.001) %>% #average abundance of 0.1%
left_join(metadata_df[,c("SampleID", "Day", "ControlSample")]) %>%
filter(ControlSample=="sample", Experiment %in% c("Chow", "Kimchi")) %>%
select(SampleID, Experiment, Day, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~spread(.x, ASVID, Abundance))
indvday_asv <- indvday0 %>%
map(~select(.x, -Day, -Experiment)) %>%
map(~column_to_rownames(.x, var="SampleID"))
indvday_groups <- indvday0 %>%
map(~.x$Day)
set.seed(0123)
indval_day <- map2(indvday_asv, indvday_groups, ~multipatt(.x, .y, control = how(nperm=999)))
indval_day_tables <- list(indval_day$Chow$sign, indval_day$Kimchi$sign) %>%
map2(., c("Chow", "Kimchi"), ~mutate(.x, Experiment=.y)) %>%
map(~rownames_to_column(.x, var="ASVID")) %>%
map(~adjust_pvalue(data=.x, p.col="p.value", method = "fdr")) %>%
map(~left_join(.x, unique(ASV_df_prop[,c("ASVID", "Phylum", "Class", "Order", "Family", "Genus", "Species")])))
names(indval_day_tables) <- c("Chow", "Kimchi")
#significant Chow
indval_day_tables$Chow %>%
filter(p.value.adj<0.05) %>%
mutate(Day=case_when(index==1~"ten",
index==2~"three")) %>%
select(Experiment, Day, ASVID, Family, Genus, Species, stat, p.value, p.value.adj) %>%
arrange(Day, p.value.adj)
## [1] Experiment Day ASVID Family Genus Species
## [7] stat p.value p.value.adj
## <0 rows> (or 0-length row.names)
#significant Tea
indval_day_tables$Kimchi %>%
filter(p.value.adj<0.05) %>%
mutate(Day=case_when(index==1~"ten",
index==2~"three")) %>%
select(Experiment, Day, ASVID, Family, Genus, Species, stat, p.value, p.value.adj) %>%
arrange(Day, p.value.adj)
## [1] Experiment Day ASVID Family Genus Species
## [7] stat p.value p.value.adj
## <0 rows> (or 0-length row.names)
No taxa are significantly associated with days.
#significant differences in communities by substrate in Kimchi and Tea samples
indvplant0 <- ASV_df_prop %>%
left_join(metadata_df[,c("SampleID", "Day", "Plant", "ControlSample")]) %>%
filter(ControlSample=="sample", Experiment %in% c("Kimchi", "Tea"), Day %in% c("eight", "ten")) %>%
group_by(ASVID) %>%
filter(case_when(Experiment=="Kimchi"~sum(Abundance>0)>=4,
Experiment=="Tea"~sum(Abundance>0)>=3)) %>% # in 10% of samples
with_groups(c(Experiment, ASVID), filter, mean(Abundance)>=0.001) %>% # average of 0.1% abundance
select(SampleID, Experiment, Plant, ASVID, Abundance) %>%
split(.$Experiment) %>%
map(~spread(.x, ASVID, Abundance))
indvplant_asv <- indvplant0 %>%
map(~select(.x, -Plant, -Experiment)) %>%
map(~column_to_rownames(.x, var="SampleID"))
indvplant_groups <- indvplant0 %>%
map(~.x$Plant)
set.seed(0123)
indval_plant <- map2(indvplant_asv, indvplant_groups, ~multipatt(.x, .y, control = how(nperm=999)))
indval_plant_tables <- list(indval_plant$Kimchi$sign, indval_plant$Tea$sign) %>%
map2(., c("Kimchi", "Tea"), ~mutate(.x, Experiment=.y)) %>%
map(~rownames_to_column(.x, var="ASVID")) %>%
map(~adjust_pvalue(data=.x, p.col="p.value", method = "fdr")) %>%
map(~left_join(.x, unique(ASV_df_prop[,c("ASVID", "Phylum", "Class", "Order", "Family", "Genus", "Species")])))
names(indval_plant_tables) <- c("Kimchi", "Tea")
#significant Kimchi
indKimchiASVs <- indval_plant_tables$Kimchi %>%
filter(p.value.adj<0.05) %>%
.$ASVID
indKimchiAbs <- ASV_df_prop %>%
filter(ASVID %in% indKimchiASVs, Experiment=="Kimchi") %>%
left_join(metadata_df[,c("SampleID", "Plant")]) %>%
with_groups(c(ASVID, Plant), summarize, mean=round(mean(Abundance)*100, 2)) %>%
spread(Plant, mean)
indval_kimchi_table <- indval_plant_tables$Kimchi %>%
filter(p.value.adj<0.05) %>%
left_join(indKimchiAbs) %>%
mutate(Substrate=case_when(index==1~"Cabbage",
index==2~"Radish"),
stat=round(stat, 3),
p.value.adj=round(p.value.adj, 3)) %>%
arrange(Substrate, Family, Genus, Species) %>%
mutate(` `=row_number()) %>%
select(` `, Family, Genus, Species, stat, p.value, p.value.adj, Cabbage, Radish) %>%
dplyr::rename(`p value`=p.value,
`q value`=p.value.adj,
`Mean\ncabbage\nabund. (%)`=Cabbage,
`Mean\nradish\nabund. (%)`=Radish) %>%
replace_na(list(Genus="", Species="")) %>%
kableExtra::kable(caption="Significant Indicator ASVs in Kimchi", align = "l") %>%
row_spec(0, bold=TRUE) %>%
kable_classic(html_font = "Arial", full_width=FALSE) %>%
pack_rows("Cabbage", 1, 11, bold = TRUE) %>%
pack_rows("Radish", 12, 13, bold = TRUE)
indval_kimchi_table
| Family | Genus | Species | stat | p value | q value | Mean cabbage abund. (% | |Mean radish abund. | |
|---|---|---|---|---|---|---|---|---|
| Cabbage | ||||||||
| 1 | Acetobacteraceae | Komagataeibacter | 0.938 | 0.001 | 0.003 | 0.54 | 0.08 | |
| 2 | Lactobacillaceae | 0.866 | 0.007 | 0.015 | 0.54 | 0.04 | ||
| 3 | Lactobacillaceae | 0.901 | 0.002 | 0.006 | 0.94 | 0.08 | ||
| 4 | Lactobacillaceae | 0.911 | 0.001 | 0.003 | 1.75 | 0.11 | ||
| 5 | Lactobacillaceae | 0.969 | 0.001 | 0.003 | 2.70 | 0.13 | ||
| 6 | Leuconostocaceae | Leuconostoc | mesenteroides | 0.980 | 0.001 | 0.003 | 4.88 | 0.07 |
| 7 | Leuconostocaceae | Leuconostoc | 0.878 | 0.007 | 0.015 | 1.08 | 0.04 | |
| 8 | Leuconostocaceae | Weissella | viridescens | 0.816 | 0.006 | 0.015 | 0.61 | 0.00 |
| 9 | Leuconostocaceae | Weissella | viridescens | 0.993 | 0.001 | 0.003 | 5.30 | 0.07 |
| 10 | Leuconostocaceae | Weissella | viridescens | 0.975 | 0.001 | 0.003 | 2.64 | 0.13 |
| 11 | Streptococcaceae | Lactococcus | lactis | 0.971 | 0.026 | 0.048 | 0.85 | 0.07 |
| Radish | ||||||||
| 12 | Erwiniaceae | Pantoea | vagans | 0.906 | 0.019 | 0.038 | 0.68 | 1.47 |
| 13 | Lactobacillaceae | Latilactobacillus | curvatus | 0.977 | 0.001 | 0.003 | 0.49 | 6.20 |
#save_kable(indval_kimchi_table, zoom=5, file = file.path(figure_out, paste0(Sys.Date(), "Table1_indval_table.png")))
#significant Tea
indval_plant_tables$Tea %>%
filter(p.value.adj<0.05) %>%
mutate(Substrate=case_when(index==1~"Black",
index==2~"Green",
index==3~"Green & Black")) %>%
select(Substrate, Family, Genus, Species, stat, p.value, p.value.adj) %>%
arrange(Substrate, p.value.adj)
## [1] Substrate Family Genus Species stat p.value
## [7] p.value.adj
## <0 rows> (or 0-length row.names)
11 ASVs associated with cabbage and 2 ASVs associated with radish. 3 of the cabbage-assicaited ASVs were Weissella viridescens.
indASVorderDF <- indval_plant_tables$Kimchi %>%
filter(p.value.adj<0.05) %>%
mutate(Substrate=case_when(index==1~"Cabbage",
index==2~"Radish")) %>%
arrange(Substrate, Family, Genus, Species) %>%
replace_na(list(Species="")) %>%
mutate(rown=row_number(),
Family=case_when(!is.na(Family)~paste0("f_", Family)),
Genus=coalesce(Genus, Family),
Species2=paste(rown, Genus, Species)) %>%
select(ASVID, Species2)
ASV_df_prop %>%
filter(Experiment=="Kimchi") %>%
filter(ASVID %in% indKimchiASVs) %>%
left_join(metadata_df[,c("SampleID", "Plant")]) %>%
left_join(indASVorderDF) %>%
mutate(Species2=factor(Species2, levels=indASVorderDF$Species2)) %>%
ggplot(aes(x=Plant, y=Abundance)) +
geom_boxplot(alpha=0) +
geom_beeswarm(alpha=0.4, cex = 5, corral = "wrap") +
facet_wrap(~Species2, scales = "free_y", nrow=3) +
theme(axis.text.x = element_text(size=11),
strip.text.x = element_text(size=9),
panel.grid.minor = element_blank()) +
labs(x=NULL)
#ggsave(filename = file.path(figure_out, paste(Sys.Date(), "FigureS2_indval_abundances.png", sep="_")))
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] magick_2.8.5 ggtext_0.1.2 cowplot_1.1.3
## [4] ggbeeswarm_0.7.2 kableExtra_1.4.0 formattable_0.2.1
## [7] indicspecies_1.7.15 rstatix_0.7.2 vegan_2.6-8
## [10] lattice_0.22-6 permute_0.9-7 lubridate_1.9.3
## [13] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [16] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [19] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 viridisLite_0.4.2 farver_2.1.2 vipor_0.4.7
## [5] fastmap_1.2.0 webshot2_0.1.1 promises_1.3.2 digest_0.6.36
## [9] timechange_0.3.0 lifecycle_1.0.4 cluster_2.1.6 processx_3.8.4
## [13] magrittr_2.0.3 compiler_4.4.1 rlang_1.1.4 sass_0.4.9
## [17] tools_4.4.1 utf8_1.2.4 yaml_2.3.10 knitr_1.48
## [21] labeling_0.4.3 htmlwidgets_1.6.4 bit_4.0.5 xml2_1.3.6
## [25] websocket_1.4.2 abind_1.4-8 withr_3.0.1 grid_4.4.1
## [29] fansi_1.0.6 colorspace_2.1-1 scales_1.3.0 MASS_7.3-60.2
## [33] cli_3.6.3 rmarkdown_2.27 crayon_1.5.3 generics_0.1.3
## [37] rstudioapi_0.16.0 tzdb_0.4.0 commonmark_1.9.2 chromote_0.4.0
## [41] cachem_1.1.0 splines_4.4.1 parallel_4.4.1 vctrs_0.6.5
## [45] Matrix_1.7-0 jsonlite_1.8.8 carData_3.0-5 car_3.1-3
## [49] hms_1.1.3 bit64_4.0.5 Formula_1.2-5 beeswarm_0.4.0
## [53] systemfonts_1.1.0 jquerylib_0.1.4 glue_1.7.0 ps_1.7.7
## [57] stringi_1.8.4 gtable_0.3.5 later_1.4.1 munsell_0.5.1
## [61] pillar_1.9.0 htmltools_0.5.8.1 R6_2.5.1 vroom_1.6.5
## [65] evaluate_0.24.0 markdown_1.13 highr_0.11 backports_1.5.0
## [69] gridtext_0.1.5 broom_1.0.6 bslib_0.8.0 Rcpp_1.0.14
## [73] svglite_2.1.3 nlme_3.1-164 mgcv_1.9-1 xfun_0.46
## [77] pkgconfig_2.0.3